library(brms)
library(data.table)

# SET WORKING DIRECTORY
setwd("/mnt/c/Users/drmil/Dropbox/White House Lobbying/3YP/APSR Final/Replication Archive")

# READ IN DATA FOR ANALYSIS
obama_logs_analysis <- fread("obama_logs_analysis.csv", header = TRUE, stringsAsFactors = FALSE)

model_formula <- brmsformula(mvbind(any_visits_high_leq0,
                                    any_visits_low_leq0) ~ 
                               any_visits_high_leq0_1l +
                               any_visits_low_leq0_1l + 
                               log(lobbyexp_1l+1) + made_donations_l1tol8 + 
                               made_donations_l1tol8:log(amt_donations_1ltol8+1) +
                               industry_pid + inhouse_lobby +
                               ACC + ADV + AER + AGR + ALC + ANI + APP + ART +
                               AUT + AVI + BAN + BEV + BNK + BUD + CAW + CDT + CHM + CIV + COM +
                               CON + CPI + CPT + CSP + DEF + DIS + DOC + ECN + EDU + ENG + ENV +
                               FAM + FIN + FIR + FOO + FOR + FUE + GAM + GOV + HCR + HOM + HOU +
                               IMM + IND + INS + INT + LAW + LBR + MAN + MAR + MED + MIA + MIN +
                               MMM + MON + NAT + PHA + POS + REL + RES + RET + ROD + RRR + SCI +
                               SMB + SPO + TAR + TAX + TEC + TOB + TOR + TOU + TRA + TRD + TRF +
                               TRU + UNM + URB + UTI + VET + WAS + WEL +
                               (1|q|yearperiod) + 
                               (1|p|crp_orgname:crp_industry) + 
                               (1|r|crp_industry))

# USE MODEL FORMULA TO CREATE DATA FRAME TO ASSIGN PRIORS
priors_df <-get_prior(model_formula,
                      data = obama_logs_analysis, 
                      family=bernoulli(link = "logit"))

# ASSIGN DIFFUSE PRIORS FOR BETA COEFFICIENTS, VARYING INTERCEPTS FOR EACH
# GROUPING, AND CORRELATION AMONG EACH GROUPING OF VARYING INTERCEPTS
priors_df$prior <- ifelse(priors_df$class=="b", "normal(0, 10)", priors_df$prior)
priors_df$prior <- ifelse(priors_df$class=="cor", "lkj(1)", priors_df$prior)
priors_df$prior <- ifelse(priors_df$class=="sd"|priors_df$class=="Intercept", 
                          "student_t(3, 0, 10)", priors_df$prior)

# FITTING MODEL--NOTE THAT MODEL USES BOTH WITHIN- AND ACROSS-CHAIN 
# PARALLELIZATION
tableSI11_cols3and4 <- brm(model_formula,
                     data= obama_logs_analysis, 
                     family=bernoulli(link = "logit"), 
                     prior = priors_df,
                     refresh = 10, 
                     control=list(max_treedepth = 20),
                     threads = threading(threads = 3, static = TRUE), 
                     backend = "cmdstanr",
                     chains = 4, 
                     cores = 4, 
                     iter = 5000, 
                     warmup = 4000,
                     seed = 1989)

summary(tableSI11_cols3and4)

# model diagnostics
# any divergent transitions?
sum(nuts_params(tableSI11_cols3and4, pars = "divergent__")$Value)
# any rhats above 1.1?
sort(rhat(tableSI11_cols3and4), decreasing = TRUE)[1:10]
# neff ratio sufficiently large (>0.001 for all parameters?)
sort(neff_ratio(tableSI11_cols3and4), decreasing = FALSE)[1:10]
# NOTE THAT THE FILE COMPRESSION ARGUMENTS ARE CHANGED FOR THIS FILE; THIS WAS 
# NECESSARY TO ENSURE THE FILE SIZE DID NOT EXCEED THE DATAVERSE'S LIMITS
save(tableSI11_cols3and4, file="tableSI11_cols3and4.RData", compress = "bzip2", 
     compression_level = 9)